package com.tongna.crawler.demo;

import cn.edu.hfut.dmic.webcollector.model.CrawlDatums;
import cn.edu.hfut.dmic.webcollector.model.Page;
import cn.edu.hfut.dmic.webcollector.plugin.berkeley.BreadthCrawler;
import cn.edu.hfut.dmic.webcollector.plugin.nextfilter.HashSetNextFilter;
import com.tongna.crawler.data.apis.Apis;
import com.tongna.crawler.data.utils.RpcUtils;
import com.tongna.crawler.data.vo.WebSiteVo;

import java.util.List;
public class BidCrawler extends BreadthCrawler {
    private WebSiteVo site;

    public BidCrawler(WebSiteVo vo) {
        super("BidCrawler",true);
        this.site = vo;
    }

    @Override
    public void visit(Page page, CrawlDatums next) {
        System.out.println(page.url());
//        if (page.matchUrl(this.site.getDetailReg())) {
            try {
                String url =page.url();// page.getUrl();
                String title = page.select(this.site.getTitle()).first().text();
                String content = page.select(this.site.getContent()).first().html();
                ((Apis) RpcUtils.get(Apis.class)).add(url, title, content, this.site.getId()).execute().body();
            } catch (Exception var6) {
                var6.printStackTrace();
            }
//        }

    }

    public static void start(WebSiteVo vo) {
        BidCrawler crawler = new BidCrawler(vo);
        List<String> seeds = vo.getSeeds();
        if (seeds != null && seeds.size() > 0) {
            for (String seed : seeds) {
                crawler.addSeed(seed);
            }
        }

        List<String> regs = vo.getRegs();
        if (regs != null && regs.size() > 0) {
            for (String reg : regs) {
                crawler.addRegex(reg);

            }
        }
        crawler.setThreads(vo.getThreads());
        try {
            crawler.start(vo.getDeep());
        } catch (Exception e) {
            e.printStackTrace();
        }

    }

    public static void main(String[] args) {
        WebSiteVo vo = new WebSiteVo();
        vo.setDetailReg("http://www.ccgp-jiangsu.gov.cn/pub/jszfcg/cgxx/cggg/.*.html");
        vo.getSeeds().add("http://www.ccgp-jiangsu.gov.cn/");
        vo.getRegs().add("http://www.ccgp-jiangsu.gov.cn/.*.html");
        vo.setMaxNum(Integer.valueOf(10000));
        vo.setDeep(Integer.valueOf(10));
        vo.setThreads(Integer.valueOf(30));
        vo.setTitle("div[class=dtit]");
        vo.setContent("div[class=detail]");
        start(vo);

        HashSetNextFilter nextFilter = new HashSetNextFilter();
    }
}